Introduction

This markdown provides comparison between two meQTL analysis after LD clumping. meQTL analysis were done using the R package MatrixEQTL. LD clumping was done using PLINK

For all analysis, the same DNAm data were used which underwent following QC steps:

DNAm QC

The imputed QC SNP data (no LD pruning, the MHC regions are included) were used. The data includes 3’957’338 SNPs.

All meQTLs passed the significance threshold of FDR = 0.05.

LD clumping was done per CpG using the following parameters:

–clump-p1 = 0.05: Significance threshold for index SNPs

–clump-p2 = 1: Secondary significance threshold for clumped SNPs

–clump-r2 = 0.2: LD threshold for clumping

–clump-kb = 200: Physical distance threshold for clumping

Results

Significant Hits, FDR < 0.05

x %>%
    ggplot(aes(y = cnt, x = Var2, fill = as.factor(as.numeric(Var2)))) + geom_col() + geom_text(aes(label = comma(cnt,
    accuracy = 1L), y = cnt), stat = "identity", vjust = -0.2, size = 2.5) + scale_y_continuous(labels = scientific) +
    # scale_fill_viridis(discrete = TRUE, alpha = 0.9, option = 'C') +
labs(title = " ", y = "Count", x = " ") + facet_wrap(~Var1, ncol = 3) + theme(legend.position = "none",
    legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8), axis.title.x = element_blank()) +
    scale_fill_manual(values = cbPalette[c(1, 2)])

Scatter plot

fdr.thr <- 0.05
plot.title <- paste0("Significant at FDR <= ", fdr.thr, " meQTLs.", "\nResult of MatrixEQTL")
GetScatterPlot2(meqtl.all.full.df, plot.title = plot.title, cbPalette = cbPalette)

Manhattan plot

# Load genomic locations

cpg.loc <- fread("/binder/mgp/workspace/2020_DexStim_Array_Human/dex-stim-human-array/data/integrative/matrixEQTL/cpg_locations.csv",
    select = c("CpG_ID", "chr", "pos"))

meqtl.delta.notclumped.fn <- "/binder/mgp/workspace/2020_DexStim_Array_Human/dex-stim-human-array/output/data/integrative/matrixEQTL/meqtls/with_missingness/me-qtl_cis_result_snps_with_na_delta_beta_fdr_005.csv"
meqtl.delta.notclumped.df <- fread(meqtl.delta.notclumped.fn, col.names = col.names)

meqtl.loc.df <- left_join(meqtl.delta.notclumped.df, cpg.loc)

Delta-GR

GetManhattanPlot(meqtl.loc.df)

Examples

The meQTL effect observed only post-dexamethasone

# paste0('Scatter plot for the independent significant at FDR <= ', fdr.thr, ' meQTLs')
intersect.cpgs <- intersect(meqtl.all.full.df[treatment == "delta", CpG_ID], meqtl.all.full.df[treatment ==
    "baseline", CpG_ID])
meqtl.id <- meqtl.all.full.df[treatment == "delta"][!(CpG_ID %in% intersect.cpgs)][fdr == min(fdr), meQTL_ID]
selected.meqtl <- meqtl.all.full.df[meQTL_ID %in% meqtl.id]  # beta == max(beta)

kable(selected.meqtl %>%
    dplyr::select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs9616713 cg08880347 0.0271 8.74 1.13e-15 1.04e-09 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl[1], fdr.thr = 0.05,
    plot.title = plot.title)

The most signififcant GR-response meQTL

meqtl.id <- meqtl.all.full.df[treatment == "delta"][fdr == min(fdr), meQTL_ID]
selected.meqtl <- meqtl.all.full.df[meQTL_ID %in% meqtl.id]  # beta == max(beta)

kable(selected.meqtl %>%
    dplyr::select(SNP, CpG_ID, FC = beta, `t-stat`, `p-value`, FDR = fdr, Treatment = treatment) %>%
    mutate_if(is.numeric, funs(as.character(signif(., 3)))))
SNP CpG_ID FC t-stat p-value FDR Treatment
rs12310416 cg07195891 -0.154 -19.5 9.69e-47 4.48e-42 baseline
rs12310416 cg07195891 0.0718 16.7 2.35e-39 3.21e-30 delta
ProcessGetBoxPlot(methyl.beta.veh.df, methyl.beta.dex.df, snp.df, selected.meqtl[1], fdr.thr = 0.05,
    plot.title = plot.title)

Overlap between baseline and delta GR

meQTL level

euler.tbl <- euler(meqtls.meqtls)

# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
    col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))

SNP level

euler.tbl <- euler(meqtls.snps)

# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
    col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))

CpG level

euler.tbl <- euler(meqtls.cpgs)

# For visualisation purposes:
euler.tbl$ellipses["delta", c("a", "b")] <- euler.tbl$ellipses["delta", c("a", "b")] * 5

plot(euler.tbl, quantities = list(type = c("counts", "percent")), fills = alpha(cbPalette[c(2, 1)], 0.5),
    col = alpha(cbPalette[c(2, 1)], 1), labels = c("GR-response", "Baseline"))

Distances SNP to CpG

GR-response

ggplot(bins.df, aes(DistanceInterval)) + geom_bar(position = "identity", fill = cbPalette[2], colour = cbPalette[2],
    alpha = 0.5) + stat_count(geom = "text", aes(label = comma(..count.., accuracy = 1L)), position = position_dodge(1),
    vjust = -1, colour = "black", cex = 3) + theme(legend.position = c(0.1, 0.9), legend.title = element_blank(),
    panel.grid.major = element_blank(), panel.background = element_blank(), axis.text.y = element_blank(),
    axis.text.x = element_text(angle = 45, vjust = 0.8, size = 10, color = "black"), plot.title = element_text(size = 8),
    axis.title = element_text(size = 14)) + labs(title = paste0("Distribution of GR-response cis-meQTLs, FDR = ",
    fdr.thr), x = "Distance Interval, bp", y = "No. cis-meQTLs") + scale_fill_manual(values = cbPalette[2])

Baseline

ggplot(bins.df, aes(DistanceInterval)) + geom_bar(position = "identity", fill = cbPalette[1], colour = cbPalette[1],
    alpha = 0.5) + stat_count(geom = "text", aes(label = label_number(scale = 0.001, suffix = "K", accuracy = 1)(..count..)),
    position = position_dodge(1), vjust = -1, colour = "black", cex = 3) + theme(legend.position = c(0.1,
    0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    axis.text.y = element_blank(), axis.text.x = element_text(angle = 45, vjust = 0.8, size = 10, color = "black"),
    plot.title = element_text(size = 8), axis.title = element_text(size = 14)) + labs(title = paste0("Distribution of baseline cis-meQTLs, FDR = ",
    fdr.thr), x = "Distance Interval, bp", y = "No. cis-meQTLs") + scale_fill_manual(values = cbPalette[2])

FDR

GR-response

ggplot(meqtl.delta.dist.df, aes(x = dist, y = -log10(fdr))) + geom_point(colour = cbPalette[2]) + theme(legend.position = c(0.1,
    0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL p-values plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
    x = "Distance from SNP, bp", y = "-log10 FDR")

Baseline

ggplot(meqtl.veh.dist.df, aes(x = dist, y = -log10(fdr))) + geom_point(colour = cbPalette[1]) + theme(legend.position = c(0.1,
    0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL p-values plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
    x = "Distance from SNP, bp", y = "-log10 FDR")

FC

GR-response

ggplot(meqtl.delta.dist.df, aes(x = dist, y = beta)) + geom_point(colour = cbPalette[2]) + theme(legend.position = c(0.1,
    0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL effect size plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
    x = "Distance from SNP, bp", y = "Effect size")

Baseline

ggplot(meqtl.veh.dist.df, aes(x = dist, y = beta)) + geom_point(colour = cbPalette[1]) + theme(legend.position = c(0.1,
    0.9), legend.title = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 10), axis.title = element_text(size = 8)) + labs(title = "Delta meQTL effect size plotted against distance from the CpG
nearest sites in a +/- 1Mb window relative to each CpG site for each delta meQTL SNP.",
    x = "Distance from SNP, bp", y = "Effect size")